Equal
逐元素计算两个输入是否相等
\[\begin{split}output_i = \begin{cases}
\text{True}, & \text{if } Input0_i = Input1_i \\
\text{False}, & \text{if } Input0_i \neq Input1_i
\end{cases}\end{split}\]
- 输入:
Input0 - 第一个输入数据地址。
Input1 - 第二个输入数据地址。
- params - 其他参数打包成数组:
input0_dims - input0 的维度。
input1_dims - input1的维度。
output_dims - output的维度。
strides0 - Input0 的步长,大小为8*sizeof(int)
strides1 - Input1 的步长,大小为8*sizeof(int)
strides_output - Output 的步长,大小为8*sizeof(int)
num_dims - 维度数量。
core_mask - 核掩码(仅共享存储版本需要)。
- 输出:
output - 计算结果地址。
- 支持平台:
FT78NEMT7004
备注
FT78NE 支持int8, int16, int32, fp32, fp64, cplx64, cplx128
MT7004 支持fp16, fp32, int16, int32, cplx64
共享存储版本:
-
void i8_equal_s(int8_t *Input0, int8_t *Input1, bool *output, long long *params, int core_mask)
-
void i16_equal_s(int16_t *Input0, int16_t *Input1, bool *output, long long *params, int core_mask)
-
void i32_equal_s(int *Input0, int *Input1, bool *output, long long *params, int core_mask)
-
void hp_equal_s(half *Input0, half *Input1, bool *output, long long *params, int core_mask)
-
void fp_equal_s(float *Input0, float *Input1, bool *output, long long *params, int core_mask)
-
void dp_equal_s(double *Input0, double *Input1, bool *output, long long *params, int core_mask)
-
void c64_equal_s(float *Input0, float *Input1, bool *output, long long *params, int core_mask)
-
void c128_equal_s(double *Input0, double *Input1, bool *output, long long *params, int core_mask)
C调用示例:
1//FT78NE示例 2#include <stdio.h> 3#include <equal.h> 4 5int main(int argc, char* argv[]) { 6 float *input0 = (float *)0x81000000; 7 float *input1 = (float *)0x82000000; 8 bool *output = (bool *)0x83000000; 9 int *strides0 = (int*)0x84000000; 10 int *strides1 = (int*)0x85000000; 11 int *strides_output = (int*)0x86000000; 12 13 int i = 0; 14 15 srand(seed++); 16 17 float f_min = -100.0; 18 float f_max = 100.0; 19 20 // same shape 21 int input0_dims[] = {4, 8, 17}; // 2x2 22 int input1_dims[] = {4, 8, 17}; // 2x2 23 int output_dims[] = {4, 8, 17}; // 2x2 24 int num_dims = 3; 25 26 unsigned long long params[9]; 27 params[0] = (unsigned long long)input0_dims; 28 params[1] = (unsigned long long)input1_dims; 29 params[2] = (unsigned long long)output_dims; 30 params[3] = (unsigned long long)strides0; 31 params[4] = (unsigned long long)strides1; 32 params[5] = (unsigned long long)strides_output; 33 params[6] = (unsigned long long)num_dims; 34 35 int total_input0 = get_total_elements(num_dims, input0_dims); 36 int total_input1 = get_total_elements(num_dims, input1_dims); 37 int total_output = get_total_elements(num_dims, output_dims); 38 39 for(i = 0;i < total_input0;++i) { 40 input0[i] = f_min + ((float)rand() / (float)RAND_MAX) * (f_max - f_min); 41 } 42 43 for(i = 0;i < total_input1;++i) { 44 input1[i] = f_min + ((float)rand() / (float)RAND_MAX) * (f_max - f_min); 45 } 46 int core_mask = 0b1111; 47 fp_equal_s(input0, input1, output, params, core_mask);//调用汇编 48 return 0; 49}
私有存储版本:
-
void i8_equal_p(int8_t *Input0, int8_t *Input1, bool *output, long long *params)
-
void i16_equal_p(int16_t *Input0, int16_t *Input1, bool *output, long long *params)
-
void i32_equal_p(int32_t *Input0, int32_t *Input1, bool *output, long long *params)
-
void hp_equal_p(half *Input0, half *Input1, bool *output, long long *params)
-
void fp_equal_p(float *Input0, float *Input1, bool *output, long long *params)
-
void dp_equal_p(double *Input0, double *Input1, bool *output, long long *params)
-
void c64_equal_p(float *Input0, float *Input1, bool *output, long long *params)
-
void c128_equal_p(double *Input0, double *Input1, bool *output, long long *params)
C调用示例:
1//FT78NE示例 2#include <stdio.h> 3#include <equal.h> 4 5int main(int argc, char* argv[]) { 6 float *input0 = (float *)0x10010000; 7 float *input1 = (float *)0x10020000; 8 bool *output = (bool *)0x10030000; 9 int *strides0 = (int*)0x10050000; 10 int *strides1 = (int*)0x10060000; 11 int *strides_output = (int*)0x10070000; 12 13 int i = 0; 14 15 srand(seed++); 16 17 float f_min = -100.0; 18 float f_max = 100.0; 19 20 // same shape 21 int input0_dims[] = {4, 8, 17}; // 2x2 22 int input1_dims[] = {4, 8, 17}; // 2x2 23 int output_dims[] = {4, 8, 17}; // 2x2 24 int num_dims = 3; 25 26 unsigned long long params[9]; 27 params[0] = (unsigned long long)input0_dims; 28 params[1] = (unsigned long long)input1_dims; 29 params[2] = (unsigned long long)output_dims; 30 params[3] = (unsigned long long)strides0; 31 params[4] = (unsigned long long)strides1; 32 params[5] = (unsigned long long)strides_output; 33 params[6] = (unsigned long long)num_dims; 34 35 int total_input0 = get_total_elements(num_dims, input0_dims); 36 int total_input1 = get_total_elements(num_dims, input1_dims); 37 int total_output = get_total_elements(num_dims, output_dims); 38 39 for(i = 0;i < total_input0;++i) { 40 input0[i] = f_min + ((float)rand() / (float)RAND_MAX) * (f_max - f_min); 41 } 42 43 for(i = 0;i < total_input1;++i) { 44 input1[i] = f_min + ((float)rand() / (float)RAND_MAX) * (f_max - f_min); 45 } 46 47 fp_equal_p(input0, input1, output, params);//调用汇编 48 return 0; 49}